# Simulate coalescent process with heterochronous sampling times

# Assumptions and modifications
# - fixes frac but changes sample numbers
# - model selection over square waves of 2^p segments
# - deposits batch runs in a single folder
# - samples placed isochronously at 0 and tshift 
# - tperiod is a multiple of tshift

# Clean the workspace and console
closeAllConnections(); rm(list=ls())
cat("\014"); graphics.off()

# Packages for phylodyn
library("sp"); library("devtools"); library("INLA")
library("spam"); library("ape"); library("phylodyn")

# Set working directory to source
this.dir <- dirname(parent.frame(2)$ofile)
setwd(this.dir)

# Function to write simple csv files to correct path
tableWrite <- function(val, name, pathname) {
  # Add path to name
  str0 <- paste(c(pathname, name), collapse = "")
  # Write table
  write.table(val, str0, row.names=FALSE, col.names=FALSE, sep=",")
}

# Main code for heterochronous simulations ----------------------------------------------------------

# Shift time (min period of square wave)
tshift = 200; nsegs = 16
# Square wave range and numbers
frac = 0.5; nwave = log2(nsegs)+1
# Period sizes (decreasing)
tperiod = (nwave-1):0; tperiod = 2^tperiod
# Waves (define sequence of fr)
wave = matrix(1, nrow = nwave, ncol = nsegs)
nPeriods = matrix(0, 1, nwave)
for(i in 2:nwave){
  # No. periods and starting amplitude
  nPeriods[i] = nsegs/tperiod[i]
  ampLast = 1
  for(j in 1:nPeriods[i]){
    wave[i, seq((j-1)*tperiod[i]+1, j*tperiod[i])] = ampLast
    if(ampLast == 1){
      ampLast = frac
    } else {
      ampLast = 1
    }
  }
}

# No. square wave trajectories
numTraj = 500; Nmax = 300;
# Randomly pick trajectories
idTraj = sample(1:nwave, numTraj, replace=TRUE, prob = rep(1, nwave)/nwave)

# Range of sample numbers per segment to loop across 
nSamps = seq(51, 201, 10)
numRuns = length(nSamps)
# Sample times
samptimes = seq(0, (nsegs-1)*tshift, by = tshift)

for (j in 1:numRuns) {
  # Number of samples introduced at each time
  nsamps = nSamps[j]
  samps = nsamps*rep(1, nsegs)
  # Coalescent events in each trajectory
  nc = rep(0, numTraj)
  
  # Create folder for traj specific results
  trajName = paste(c('sqwave_', nsamps-1), collapse = '')
  dir.create(file.path(this.dir, trajName))
  pathf = paste(c(this.dir, '/', trajName, '/'), collapse = "")
  
  # Define square wave population size
  sqwave_traj <- function (t, wav = wave[idTraj[j],], N = Nmax, tseg = samptimes) 
  {
    popsize = rep(N, length(t))
    # Changing population based on wave
    for(i in 1:nsegs){
      popsize[t <= tseg[i] & t > tseg[i-1]] = wav[i]*N
    }
    # Beyond last wave segment hold last popsize
    popsize[t > tseg[nsegs]] = wav[nsegs]*N
    return(popsize)
  }
  traj = sqwave_traj
  
  for(i in 1:numTraj){
    # Simulate genealogy and get all times
    gene = coalsim(samp_times = samptimes, n_sampled = samps, traj = traj, lower_bound = 10, method = "thin")
    coal_times = gene$coal_times
    coalLin = gene$lineages
    
    # TMRCA and no. coalescent events
    tmax = max(coal_times)
    nc[i] = length(coal_times)
    
    # Obtain true trajectory across time
    t = seq(0, tmax, length=10^4)
    y = traj(t)
    
    # Export trajectory specific data for Matlab
    tableWrite(coal_times, paste(c('coaltimes', i, '.csv'), collapse = ''), pathf)
    tableWrite(coalLin, paste(c('coalLin', i, '.csv'), collapse = ''), pathf)
  }
  
  # No. samples, coalescences and times
  tableWrite(nc, 'nc.csv', pathf)
  tableWrite(samptimes, 'samptimes.csv', pathf)
  tableWrite(samps, 'sampIntro.csv', pathf)
  # Data on square wave models
  tableWrite(numRuns, 'numRuns.csv', pathf)
  tableWrite(numTraj, 'numTraj.csv', pathf)
  tableWrite(wave, 'wave.csv', pathf)
  tableWrite(idTraj, 'idTraj.csv', pathf)
  tableWrite(c(Nmax, frac*Nmax), 'Nrange.csv', pathf)
}


